//merge transaction data with application data
//step 1. merge application data with transactions to generate an application-level dataset with indicators for transactions we care about
//step 2. drop apps that are not docketed, receive neither IR nor patent, and have more than 10 inventors (117k total)

//load application data and merge with transaction data
use "${filedata}2014apps_emp_lawyer_inventors_gender.dta", clear
//get file down to the application-level (1 per app)
bysort application_number: gen dup = _n
drop if dup > 1
drop dup
//drop application-level vars
drop inventor_id female
merge 1:1 application_number using "${filedata}transactions_application_indicators.dta"
drop if _merge != 3
drop _merge
save "${filedata}transactions_application_merged.dta", replace
//2.3m applications

//gen vars we care about
use "${filedata}transactions_application_merged.dta", clear
destring examiner_id, replace
encode uspc_class, gen(class) 
encode uspc_subclass , gen(subclass ) 
gen year = year(filing_date)

//only keep docketed apps. drops 29 apps
drop if docketed==0
//count if initial_rejection ==0 & patent_issued==0
//only keep apps where either there is a rejection or patent is issued. drops 115k apps
drop if initial_rejection ==0 & patent_issued==0
//drop applications with large numbers of inventors. only 2,921 of these
drop if inventor_count>10

//drop unneeded vars
drop correspondence_name_line_1 correspondence_name_line_2 apptag atty_docket_number
save "${filedata}transactions_application_2023.dta", replace

